# libs ----
library(sf)
library(geomander)
library(tidyverse)
library(redist)
library(ppmf)
library(patchwork)
library(wacolors)
readr::local_edition(1)
# paths
ppmf19_path <- "data-raw/ppmf_19.csv"
ppmf12_path <- "data-raw/ppmf_12.csv"
ppmf4_path <- "data-raw/ppmf_04.csv"

# helper ----
source('R/00_custom_functions.R')

# data ----
ppmf19 <- read_ppmf('NY', ppmf19_path)
ppmf12 <- read_ppmf('NY', ppmf12_path)
ppmf4 <- read_ppmf('NY', ppmf4_path)

# subset
ppmf19 <- ppmf19 %>% filter(TABBLKCOU == '087')
ppmf12 <- ppmf12 %>% filter(TABBLKCOU == '087')
ppmf4 <- ppmf4 %>% filter(TABBLKCOU == '087')

# agg
ppmf19 <- ppmf19 %>% add_geoid() %>% agg()
ppmf19 <- ppmf19 %>%
    rename_with(~ add_pref(.x,pref = 'v19'), starts_with(c('pop', 'vap')))
ppmf12 <- ppmf12 %>% add_geoid() %>% agg()
ppmf12 <- ppmf12 %>%
    rename_with(~ add_pref(.x,pref = 'v12'), starts_with(c('pop', 'vap')))
ppmf4 <- ppmf4 %>% add_geoid() %>% agg()
ppmf4 <- ppmf4 %>%
    rename_with(~ add_pref(.x,pref = 'v4'), starts_with(c('pop', 'vap')))

# add comparison
census <- create_block_table('NY', 'Rockland')

# join ----
tb <- census %>%
    left_join(ppmf19, by = 'GEOID') %>%
    left_join(ppmf12, by = 'GEOID') %>%
    left_join(ppmf4, by = 'GEOID')
# set nas to 0
tb[is.na(tb)] <- 0

# subset ----
er_geoids <- readRDS('data/ERCSD/er_geoids.Rds')
er <- tb %>% filter(GEOID %in% er_geoids)

# Add enacted wards ----
man <- readxl::read_xlsx('data/ERCSD/ercsd_manual_block_dists.xlsx',
                         col_names = c('rn', 'ward'))
er <- er %>% mutate(rn = row_number())

er <- er %>% left_join(man, by = 'rn')

# load in wru preds; thanks to Evan
load('data-raw/ramapo_wruPreds.rData') # vf.wru
load('data-raw/ramapo_dp4Preds.rData') # vf.4
load('data-raw/ramapo_dp12Preds.rData') # vf.12
load('data-raw/ramapo_dp19Preds.rData') # vf.19

vf.wru <- vf.wru %>% select(GEOID10, starts_with('pred'))
vf.wru <- vf.wru %>%
    rowwise() %>%
    mutate(max_group = max(c(pred.whi, pred.bla, pred.his, pred.asi, pred.oth))) %>%
    filter(!is.na(pred.whi))

vf.4 <- vf.4 %>% select(GEOID10, starts_with('pred'))
vf.4 <- vf.4 %>%
    rowwise() %>%
    filter(!is.na(pred_whi))

vf.12 <- vf.12 %>% select(GEOID10, starts_with('pred'))
vf.12 <- vf.12 %>%
    rowwise() %>%
    filter(!is.na(pred_whi))

vf.19 <- vf.19 %>%
    select(GEOID10, starts_with('pred')) %>%
    rowwise() %>%
    filter(!is.na(pred_whi))


vf.wru_agg <- vf.wru %>% group_by(GEOID10) %>%
    summarize(across(.fns = sum)) %>%
    filter(!is.na(pred.whi)) %>% rename(GEOID = GEOID10)
vf.wru_agg <- janitor::clean_names(vf.wru_agg)
vf.wru_agg <- vf.wru_agg %>%
    mutate(pred_reg = pred_whi + pred_bla + pred_his + pred_asi + pred_oth)

vf.4_agg <- vf.4 %>% group_by(GEOID10) %>%
    summarize(across(.fns = sum)) %>%
    filter(!is.na(pred_whi)) %>% rename(geoid = GEOID10) %>%
    mutate(pred_reg = pred_whi + pred_bla + pred_his + pred_asi + pred_oth)
vf.4_agg <- vf.4_agg %>%
    rename_with(~ add_pref(.x,pref = 'v4'), starts_with(c('pred')))

vf.12_agg <- vf.12 %>% group_by(GEOID10) %>%
    summarize(across(.fns = sum)) %>%
    filter(!is.na(pred_whi)) %>% rename(geoid = GEOID10) %>%
    mutate(pred_reg = pred_whi + pred_bla + pred_his + pred_asi + pred_oth)
vf.12_agg <- vf.12_agg %>%
    rename_with(~ add_pref(.x,pref = 'v12'), starts_with(c('pred')))

vf.19_agg <- vf.19 %>% group_by(GEOID10) %>%
    summarize(across(.fns = sum)) %>%
    filter(!is.na(pred_whi)) %>% rename(geoid = GEOID10) %>%
    mutate(pred_reg = pred_whi + pred_bla + pred_his + pred_asi + pred_oth)
vf.19_agg <- vf.19_agg %>%
    rename_with(~ add_pref(.x,pref = 'v19'), starts_with(c('pred')))


vf <- vf.wru_agg %>% left_join(vf.4_agg) %>% left_join(vf.12_agg)
vf <- vf %>% left_join(vf.19_agg)
vf <- vf %>% rename(GEOID = geoid)


er_map <- redist_map(er, pop_tol = 0.05, total_pop = v19_pop, ndists = 9)


er_map <- er_map %>% left_join(vf)
er_map[is.na(er_map)] <- 0

set.seed(02138)
sims <- redist_smc(map = er_map, nsims = 10000, compactness = 0.9)

sims <- sims %>% add_reference(ref_plan = er$ward) %>%
    mutate(min_pct_vap19 = group_frac(map = er_map, group_pop = v19_vap_black + v19_vap_hisp,
                                      total_pop = v19_vap),
           min_pct_vap12 = group_frac(map = er_map, group_pop = v12_vap_black + v12_vap_hisp,
                                      total_pop = v12_vap),
           min_pct_vap4 = group_frac(map = er_map, group_pop = v4_vap_black + v4_vap_hisp,
                                     total_pop = v4_vap),
           min_pct_vap = group_frac(map = er_map, vap_black + vap_hisp,
                                    total_pop = vap),
           min_pct_v19pred = group_frac(er_map, v19_pred_bla + v19_pred_his,
                                        v19_pred_reg),
           min_pct_v12pred = group_frac(er_map, v12_pred_bla + v12_pred_his,
                                        v12_pred_reg),
           min_pct_v4pred = group_frac(er_map, v4_pred_bla + v4_pred_his,
                                       v4_pred_reg),
           min_pct_pred = group_frac(er_map, pred_bla + pred_his,
                                     pred_reg),
           )

{vf %>% ggplot(aes(x = pred_whi/pred_reg, y = v19_pred_whi/pred_reg)) +
        geom_count(alpha = 0.5, color = wacolors$skagit[1]) + geom_abline(linetype = 'dashed') +
        coord_fixed() +
        theme_ppmf() + geom_smooth(se = FALSE, color =  wacolors$skagit[6]) +
        theme(legend.position = 'none') +
        labs(x = 'Census 2010', y = 'DAS-19.61', title = 'Predicted White Registrants')} +
    {vf %>% ggplot(aes(x = pred_bla/pred_reg, y = v19_pred_bla/pred_reg)) +
            geom_count(alpha = 0.5, color = wacolors$skagit[2]) + geom_abline(linetype = 'dashed') +
            coord_fixed() +
            theme_ppmf() + geom_smooth(se = FALSE, color =  wacolors$skagit[6]) +
            theme(legend.position = 'none') +
            labs(x = 'Census 2010', y = 'DAS-19.61', title = 'Predicted Black Registrants')} +
    {vf %>% ggplot(aes(x = pred_his/pred_reg, y = v19_pred_his/pred_reg)) +
            geom_count(alpha = 0.5, color = wacolors$skagit[3]) + geom_abline(linetype = 'dashed') +
            coord_fixed() +
            theme_ppmf() + geom_smooth(se = FALSE, color =  wacolors$skagit[6]) +
            theme(legend.position = 'none') +
            labs(x = 'Census 2010', y = 'DAS-19.61', title = 'Predicted Hispanic Registrants')}
ggsave('figs/er_scatter_pct_19.pdf', width = 7.2, height = 3, units = 'in')


# Create tables:
paired_sims <- sims %>% subset_sampled() %>%
    group_by(draw) %>%
    summarize(nmmd_true = sum(min_pct_pred > 0.5),
              nmmd_19 = sum(min_pct_v19pred > 0.5))

pred_prob_min_pct_enacted <- sims %>%
    subset_ref() %>%
    summarize(nmmd_true = sum(min_pct_pred > 0.5),
              nmmd_19 = sum(min_pct_v19pred > 0.5))
saveRDS(pred_prob_min_pct_enacted, file = 'data/ERCSD/min_pct_enacted_19.Rds')


paired_df <- xtabs(~ nmmd_true + nmmd_19, paired_sims) %>%
    prop.table(margin = 1) %>%  # row means
    round(digits = 3) %>%
    cbind(n = table(paired_sims$nmmd_true)) %>%
    as_tibble(rownames = "nmmd_true") %>%
    mutate(`3` = 0) %>%  # add 3 to make it square
    relocate(nmmd_true, `0`, `1`, `2`, `3`) %>%  # reorder
    mutate(across(2:5, .fns = function(x){as.character(round(100*x))}))  # change to string manually
paired_df[1, 2] <- paste0(paired_df[1,2], '%')

saveRDS(paired_df, file = 'data/ERCSD/er_paired_df_19.Rds')
